ostn-03-19 vs 03-20 Processing Time Comparison for 7 pairs

In [10]:
for i in range(len(pair1)):
    draw_graph(pair1[i], pair2[i])    
png graphs saved in ../../graphs/processing_time/ostn_03_20/s0-leader/
html graphs saved in 
png graphs saved in ../../graphs/processing_time/ostn_03_20/s0-leader/
html graphs saved in 
png graphs saved in ../../graphs/processing_time/ostn_03_20/s0-leader/
html graphs saved in 
png graphs saved in ../../graphs/processing_time/ostn_03_20/s0-leader/
html graphs saved in 
png graphs saved in ../../graphs/processing_time/ostn_03_20/s0-leader/
html graphs saved in 
png graphs saved in ../../graphs/processing_time/ostn_03_20/s0-leader/
html graphs saved in 
png graphs saved in ../../graphs/processing_time/ostn_03_20/s0-leader/
html graphs saved in 
In [1]:
# !pip install pyhmy --upgrade
import json
import pandas as pd
import os
import shutil
import re
import plotly.express as px 
import plotly.graph_objects as go
import numpy as np
from IPython.core.display import display, HTML
In [2]:
def read_data(files, path):
    data = []
    for file in files:
        if "zerolog" in file:
            with open(path + file, errors='ignore') as f:
                for line in f.readlines():
                    if '"log-topic":"ds"' in line:
                        data.append(json.loads(line))
    return data
In [3]:
def data_processing(data):
    df = pd.DataFrame(data, columns = [ 'time', 'message'])
    df["message"] = df["message"].apply(lambda c: c.strip())
#     pattern = re.compile('(.*?)\..*?')
#     df['time'] = df['time'].apply(lambda c: re.findall(pattern, c)[0].replace("T"," "))
    label_pattern = re.compile('ds-(.*?)-.*?')
    df['label'] = df['message'].apply(lambda c: re.findall(label_pattern, c)[0])
    df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%dT%H:%M:%S.%f')
    df.sort_values(by=['time', 'message'], ascending=[True, False], inplace = True)
    df.reset_index(inplace = True, drop = True)
    return df
In [4]:
def get_time_diff(df):
    pair = []
    for name, group in df.groupby('label'):
        group['time_diff'] = group['time'].diff().dt.microseconds /1000000
        label = group.iloc[0].label
        new = group[group['message'] == 'ds-' + label + "-end"]
        new.reset_index(inplace = True, drop = True)
        pair.append(new)
    return pair
In [5]:
def draw_graph(new, new2):
    
    label = new.iloc[0].label
    html_path = "https://harmony-one.github.io/harmony-log-analysis/" + html_dir.replace("../../docs/", "") + \
    label+"_processing_time_comparison.html"
    png_path = fig_dir 
    print("png graphs saved in " + png_path)
    print('html graphs saved in ')
    display(HTML("<a href='" + html_path + "' target='_blank'>" + html_path + "</a>"))
    if not os.path.exists(html_dir):
        os.makedirs(html_dir)
        
    if not os.path.exists(png_path):
        os.makedirs(png_path)
    
    trace1 = go.Scatter(
        x= new["time"],
        y= new["time_diff"],
        mode='lines',
        name=label+" 3/19",
        line_color= "#00AEE9",
        hovertemplate = "processing time: %{y}<br>" +
        "UTC Time: %{x}<br>" +
        "<extra></extra>"
    )
    
    trace2 = go.Scatter(
        x= new2["time"],
        y= new2["time_diff"],
        mode='lines',
        name=label+" 3/20",
        line_color= "#FFA07A",
        hovertemplate = "processing time: %{y}<br>" +
        "UTC Time: %{x}<br>" +
        "<extra></extra>"
    )
    
    data = [trace1, trace2]

    layout = go.Layout(
        title=label,
        yaxis=dict(
            title='Processing Time/ seconds'
        ),
        legend_orientation="h"
    )
    
    fig = go.Figure(data=data, layout=layout)

    fig.show()
    fig.write_html(html_dir + label+"_processing_time_comparison.html")
    fig.write_image(png_path + label+"_processing_time_comparison.png",width=1000, height=500)
    
In [6]:
log_dir_1 = "../../logs/node_logs/ostn_03_19/s0-leader/"
fig_dir = "../../graphs/processing_time/ostn_03_20/s0-leader/"
html_dir = "../../docs/graphs/processing_time/ostn_03_20/s0-leader/"
files_1 = os.listdir(log_dir_1)
data_1 = read_data(files_1, log_dir_1)
df_1 = data_processing(data_1)
In [7]:
log_dir_2 = "../../logs/node_logs/ostn_03_20/s0-leader/"
files_2 = os.listdir(log_dir_2)
data_2 = read_data(files_2, log_dir_2)
df_2 = data_processing(data_2)
In [8]:
df_2['epoch'] = df_2['message'].apply(lambda c: int(re.findall(r'\b\d+\b', c)[0]) if re.findall(r'\b\d+\b', c) else np.nan)
df_2['block'] = df_2['message'].apply(lambda c: int(re.findall(r'\b\d+\b', c)[1]) if re.findall(r'\b\d+\b', c) else np.nan)
digit_pattern = '[0-9]'
df_2['message'] = df_2['message'].apply(lambda c: re.sub(digit_pattern, '', c).strip())
In [9]:
pair1 = get_time_diff(df_1)
pair2 = get_time_diff(df_2)
/Users/yishuang/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy